* Firm organization with multiple establishments
* Section III.B data preparations cross-section

clear all 
set matsize 4000
set more off

capture log close
log using log/09_facts-cs_MEorg_layer-est_data.log, replace

use untid betnr jahr persnr beruf beruf2010 tentgelt w93_3_gen w08_5 ao_kreis ao_bula ///
	hauptbet d_educ lnw limit cens if jahr == 2012 using data/Panel.dta, clear
desc

order limit cens lnw, after(tentgelt)
	
********************************************************************************
***	Sample restriction *********************************************************
********************************************************************************

*	Only multi-establishment firms
egen flg_estjhr = tag(betnr jahr)
bys untid jahr: egen count_est = total(flg_estjhr)
qui keep if count_est > 1

*	Minimum firm size
bys untid jahr: egen empl_unt = count(persnr)
qui keep if empl_unt >= 10

*	Cleaning: social security limit
qui replace tentgelt = limit if cens == 1
drop limit cens
qui replace lnw = ln(tentgelt) 		
count if lnw == .

********************************************************************************
***	Establishment characteristics **********************************************
********************************************************************************

bys betnr jahr: egen empl_bet = count(persnr)

merge m:1 betnr jahr using data/BHPinclUntID.dta
qui keep if _merge == 3
drop _merge

drop count_est_siab chge_est entry_unt jahr_eins_final jahr_eins_est az_vz eintritt ///
	betnr_vor untid_vor verysmall matchtype d_multest_4 ever_multest_4 always_multest_4 vorg_died ///
	mode austritt betnr_nach nachf_willbenew untid_nach vorgaenger nachfolger Sales ValueAdded ///
	erst_jahr_eintritt jahr_lzt_est first_year_ME entry_dyn exit_dyn exit_unt add_est mode_unt sector sector2 hq_sector

order hq_kreis, after(ao_bula)
	
********************************************************************************
***	Layer classification based on KldB2010, managerial organization ************
********************************************************************************

merge m:1 beruf2010 using data/KldB2010_LayerCMRHFriedrich.dta
qui drop if _merge == 2 
drop _merge

qui replace layer_neuAG = 3 if beruf2010 == 73294
qui replace layer_neuAG = 2 if beruf2010 == 71224
qui replace layer_neuAG = 0 if beruf2010 == 1402
qui replace layer_neuAG = 0 if beruf2010 == 1302
qui replace layer_neuAG = 0 if beruf2010 == 1203
qui replace layer_neuAG = 1 if beruf2010 == 1104

rename layer_neuAG layer

merge m:1 beruf using data/KldB1988_Blossfeld.dta
qui drop if _merge == 2
drop _merge

bys betnr jahr: egen lowest = min(layer)
bys betnr jahr: egen second_lowest = min(layer) if layer > lowest
bys betnr jahr: egen third_lowest = min(layer) if layer > second_lowest
bys betnr jahr: egen highest = min(layer) if layer > third_lowest

gen layer_rank = .
replace layer_rank = 0 if layer == lowest
replace layer_rank = 1 if layer == second_lowest & layer_rank == .
replace layer_rank = 2 if layer == third_lowest & layer_rank == .
replace layer_rank = 3 if layer == highest & layer_rank == .

//	Number of managerial layers
egen flg_untjhrlay = tag(untid jahr layer_rank)
qui replace flg_untjhrlay = 0 if layer_rank == 0
bys untid jahr: egen count_mgmt_unt = total(flg_untjhrlay)
drop flg_untjhrlay

egen flg_estjhrlay = tag(betnr jahr layer_rank)
qui replace flg_estjhrlay = 0 if layer_rank == 0
bys betnr jahr: egen count_mgmt_bet = total(flg_estjhrlay)
drop flg_estjhrlay

// Combination of layers
sort betnr jahr
qui {
	forvalues l = 0/3 {
		egen e_`l' = anymatch(layer), values(`l')
		by betnr jahr: egen count_layer`l' = sum(e_`l')
		by betnr jahr: gen l_`l' = (count_layer`l' > 0)
		label variable l_`l' "Dummy establishment has layer `l'"
		capture drop e_`l' count_layer`l'
	}
}

capture drop comb_layer_est
gen comb_layer_est = l_3 * 1000 + l_2 * 100 + l_1 * 10 + l_0 
label variable comb_layer_est "Pattern of layers, from high to low, establishment level"
label define structure 1 "0" 10 "1" 100 "2" 1000 "3" 11 "0 + 1" 101 "0 + 2" 1001 "0 + 3" ///
	110 "1 + 2" 1010 "1 + 3" 1100 "2 + 3" 111 "0 + 1 + 2" 1101 "0 + 2 + 3" 1011 "0 + 1 + 3" ///
	1110 "1 + 2 + 3" 1111 "All layers"
label values comb_layer_est structure

//	Management share (employment)
gen mgmt_lay = (layer_rank > 0)
bys betnr jahr: egen nbr_mgmt_bet = total(mgmt_lay)
bys untid jahr: egen nbr_mgmt_unt = total(mgmt_lay)
gen shr_mgmt_bet = (nbr_mgmt_bet / empl_bet) * 100
gen shr_mgmt_unt = (nbr_mgmt_unt / empl_unt) * 100
gen prdt_wkrs_unt = empl_unt - nbr_mgmt_unt
gen prdt_wkrs_bet = empl_bet - nbr_mgmt_bet
drop nbr_mgmt*

//	Management share (wage sum)
bys untid jahr: egen wage_sum_unt = total(tentgelt)
bys untid jahr: egen wage_sum_mgmt = total(mgmt_lay * tentgelt)
gen shr_mgmt_w_unt = (wage_sum_mgmt / wage_sum_unt) * 100
bys betnr jahr: egen wage_sum_bet = total(tentgelt)
bys betnr jahr: egen wage_sum_mgmt_bet = total(mgmt_lay * tentgelt)
gen shr_mgmt_w_bet = (wage_sum_mgmt_bet / wage_sum_bet) * 100

//	Management share (Blossfeld)
gen d_mgr = (blossfeld == 12)
bys betnr jahr: egen shr_mgmt_bet_bloss = mean(d_mgr * 100)
bys untid jahr: egen shr_mgmt_unt_bloss = mean(d_mgr * 100)
bys untid jahr: egen wage_unt_bloss = total(d_mgr * tentgelt)
gen shr_bloss_w_unt = (wage_unt_bloss / wage_sum_unt) * 100
bys betnr jahr: egen wage_bet_bloss = total(d_mgr * tentgelt)
gen shr_bloss_w_bet = (wage_bet_bloss / wage_sum_bet) * 100
drop wage_sum_*
drop d_mgr

********************************************************************************
***	Establishment characteristics (ctd.) ***************************************
********************************************************************************

qui keep if flg_estjhr == 1
drop flg_estjhr
drop persnr beruf beruf2010 tentgelt lnw layer layer_neu ///
		lowest second_lowest third_lowest highest layer_rank mgmt_lay blossfeld // Person-specific variables

capture drop w08_3
qui gen w08_3 = int(w08_5 / 100)
order w08_3, after(w08_5)

egen flg_wz = tag(untid jahr w08_3)
bys untid jahr: egen count_wz = total(flg_wz)
drop flg_wz

qui gen aux_hq_wz = w08_3 if hauptbet == 1
bys untid jahr: egen hq_wz = min(aux_hq_wz)
drop aux_hq_wz

qui gen aux_hq_wz93 = w93_3_gen if hauptbet == 1
bys untid jahr: egen hq_wz93 = min(aux_hq_wz)
drop aux_hq_wz
order hq_wz93 hq_wz, after(w08_3)

gen ln_prdt_workers = log(prdt_wkrs_bet)
label variable ln_prdt_workers "Log \# non-m. employees of est."

drop w08_5 ao_bula legal_cat

********************************************************************************
***	Firm characteristics *******************************************************
********************************************************************************

merge m:1 untid jahr using data/Amadeus_Aug2018_untid.dta
qui drop if _merge == 2
drop _merge
drop NACE Empl OperRev ValueAdded ln_sales ln_operrev

//	Geography
replace distance = 0 if hq_kreis == ao_kreis
replace distance_all = 0 if hq_kreis == ao_kreis
replace ldistance = log(distance)
replace ldistall = log(distance_all)
bys untid jahr: egen max_log_dist = max(ldistall)
bys untid jahr: egen max_dist = max(distance_all)

label variable ldistall "Log distance to HQ"
label variable max_log_dist "Maximum log distance to HQ"

preserve
qui keep if count_est > 2
keep untid jahr betnr ao_kreis
duplicates drop

merge m:1 ao_kreis using data/Middle_coordinates_max-pop.dta
drop if _merge == 2
drop _merge

gen double aux = untid * 10000
gen double untjhr = aux + jahr

fieldarea lon lat, id(untjhr) generate(area) unit(sqkm)
tabstat area, s(n mean sd min p10 q p90 max)
// Germany has 360,000 sqkm

save data/MEfirm_area_cs.dta, replace
restore

gen double aux = untid * 10000
gen double untjhr = aux + jahr
drop aux

cap drop flg_untjhr
egen flg_untjhr = tag(untid jahr)

merge m:1 untjhr using data/MEfirm_area_cs.dta
drop if _merge == 2
tab count_est if _merge == 1 & flg_untjhr == 1, m sort
drop _merge
drop untjhr

qui gen ln_area = log(area)

//	Size
gen ln_prdt_wkr_unt = log(prdt_wkrs_unt)
label variable ln_prdt_wkr_unt "Log \# non-m. employees of firm"

qui gen ln_sales = log(Sales)
label variable ln_sales "Log sales of firm"

// Legal form
qui gen legal_form = .
qui replace legal_form = 1 if (Rechtsform_neu == 8 | Rechtsform_neu == 16 | Rechtsform_neu == 17 | Rechtsform_neu == 19 | Rechtsform_neu == 29)
qui replace legal_form = 2 if (Rechtsform_neu == 3 | Rechtsform_neu == 11 | Rechtsform_neu == 12 | Rechtsform_neu == 14 | Rechtsform_neu == 15)
qui replace legal_form = 3 if (Rechtsform_neu == 13)
qui replace legal_form = 4 if (Rechtsform_neu == 5 | Rechtsform_neu == 22 | Rechtsform_neu == 23 | Rechtsform_neu == 24)
label define legal 1 "Einzelunternehmen" 2 "GmbH & coKG" 3 "GmbH" 4 "AG"
label values legal_form legal

qui tab hq_wz,   gen(d_08wz) // hq_wz is based on w08_3
qui tab hq_wz93, gen(d_93wz)
qui tab w08_3,     gen(d_est_08wz)
qui tab w93_3_gen, gen(d_est_93wz)
qui tab ao_kreis, gen(d_ao_kreis)
qui tab hq_kreis, gen(d_hq_kreis)
qui tab legal_form, gen(d_legal)

compress 
save "data/MEorg_2012_cs_layer-est.dta", replace

********************************************************************************
********************************************************************************

log close
